In [3]:
import pandas as pd

# Retrieve HTML table data
url = 'https://www.basketball-reference.com/leagues/NBA_2021_per_game.html'
html = pd.read_html(url, header = 0)
df2019 = html[0]
In [4]:
# Data cleaning
raw = df2019.drop(df2019[df2019.Age == 'Age'].index)
raw
Out[4]:
Rk Player Pos Age Tm G GS MP FG FGA ... FT% ORB DRB TRB AST STL BLK TOV PF PTS
0 1 Precious Achiuwa PF 21 MIA 61 4 12.1 2.0 3.7 ... .509 1.2 2.2 3.4 0.5 0.3 0.5 0.7 1.5 5.0
1 2 Jaylen Adams PG 24 MIL 7 0 2.6 0.1 1.1 ... NaN 0.0 0.4 0.4 0.3 0.0 0.0 0.0 0.1 0.3
2 3 Steven Adams C 27 NOP 58 58 27.7 3.3 5.3 ... .444 3.7 5.2 8.9 1.9 0.9 0.7 1.3 1.9 7.6
3 4 Bam Adebayo C 23 MIA 64 64 33.5 7.1 12.5 ... .799 2.2 6.7 9.0 5.4 1.2 1.0 2.6 2.3 18.7
4 5 LaMarcus Aldridge C 35 TOT 26 23 25.9 5.4 11.4 ... .872 0.7 3.8 4.5 1.9 0.4 1.1 1.0 1.8 13.5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
726 536 Delon Wright PG 28 SAC 27 8 25.8 3.9 8.3 ... .833 1.0 2.9 3.9 3.6 1.6 0.4 1.3 1.1 10.0
727 537 Thaddeus Young PF 32 CHI 68 23 24.3 5.4 9.7 ... .628 2.5 3.8 6.2 4.3 1.1 0.6 2.0 2.2 12.1
728 538 Trae Young PG 22 ATL 63 63 33.7 7.7 17.7 ... .886 0.6 3.3 3.9 9.4 0.8 0.2 4.1 1.8 25.3
729 539 Cody Zeller C 28 CHO 48 21 20.9 3.8 6.8 ... .714 2.5 4.4 6.8 1.8 0.6 0.4 1.1 2.5 9.4
730 540 Ivica Zubac C 23 LAC 72 33 22.3 3.6 5.5 ... .789 2.6 4.6 7.2 1.3 0.3 0.9 1.1 2.6 9.0

705 rows × 30 columns

In [5]:
raw.shape
Out[5]:
(705, 30)
In [6]:
raw.head()
Out[6]:
Rk Player Pos Age Tm G GS MP FG FGA ... FT% ORB DRB TRB AST STL BLK TOV PF PTS
0 1 Precious Achiuwa PF 21 MIA 61 4 12.1 2.0 3.7 ... .509 1.2 2.2 3.4 0.5 0.3 0.5 0.7 1.5 5.0
1 2 Jaylen Adams PG 24 MIL 7 0 2.6 0.1 1.1 ... NaN 0.0 0.4 0.4 0.3 0.0 0.0 0.0 0.1 0.3
2 3 Steven Adams C 27 NOP 58 58 27.7 3.3 5.3 ... .444 3.7 5.2 8.9 1.9 0.9 0.7 1.3 1.9 7.6
3 4 Bam Adebayo C 23 MIA 64 64 33.5 7.1 12.5 ... .799 2.2 6.7 9.0 5.4 1.2 1.0 2.6 2.3 18.7
4 5 LaMarcus Aldridge C 35 TOT 26 23 25.9 5.4 11.4 ... .872 0.7 3.8 4.5 1.9 0.4 1.1 1.0 1.8 13.5

5 rows × 30 columns

In [7]:
raw.isnull().sum()
Out[7]:
Rk         0
Player     0
Pos        0
Age        0
Tm         0
G          0
GS         0
MP         0
FG         0
FGA        0
FG%        2
3P         0
3PA        0
3P%       35
2P         0
2PA        0
2P%        6
eFG%       2
FT         0
FTA        0
FT%       29
ORB        0
DRB        0
TRB        0
AST        0
STL        0
BLK        0
TOV        0
PF         0
PTS        0
dtype: int64
In [8]:
df=raw.fillna(0)
In [9]:
df.isnull().sum()
Out[9]:
Rk        0
Player    0
Pos       0
Age       0
Tm        0
G         0
GS        0
MP        0
FG        0
FGA       0
FG%       0
3P        0
3PA       0
3P%       0
2P        0
2PA       0
2P%       0
eFG%      0
FT        0
FTA       0
FT%       0
ORB       0
DRB       0
TRB       0
AST       0
STL       0
BLK       0
TOV       0
PF        0
PTS       0
dtype: int64
In [10]:
df
Out[10]:
Rk Player Pos Age Tm G GS MP FG FGA ... FT% ORB DRB TRB AST STL BLK TOV PF PTS
0 1 Precious Achiuwa PF 21 MIA 61 4 12.1 2.0 3.7 ... .509 1.2 2.2 3.4 0.5 0.3 0.5 0.7 1.5 5.0
1 2 Jaylen Adams PG 24 MIL 7 0 2.6 0.1 1.1 ... 0 0.0 0.4 0.4 0.3 0.0 0.0 0.0 0.1 0.3
2 3 Steven Adams C 27 NOP 58 58 27.7 3.3 5.3 ... .444 3.7 5.2 8.9 1.9 0.9 0.7 1.3 1.9 7.6
3 4 Bam Adebayo C 23 MIA 64 64 33.5 7.1 12.5 ... .799 2.2 6.7 9.0 5.4 1.2 1.0 2.6 2.3 18.7
4 5 LaMarcus Aldridge C 35 TOT 26 23 25.9 5.4 11.4 ... .872 0.7 3.8 4.5 1.9 0.4 1.1 1.0 1.8 13.5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
726 536 Delon Wright PG 28 SAC 27 8 25.8 3.9 8.3 ... .833 1.0 2.9 3.9 3.6 1.6 0.4 1.3 1.1 10.0
727 537 Thaddeus Young PF 32 CHI 68 23 24.3 5.4 9.7 ... .628 2.5 3.8 6.2 4.3 1.1 0.6 2.0 2.2 12.1
728 538 Trae Young PG 22 ATL 63 63 33.7 7.7 17.7 ... .886 0.6 3.3 3.9 9.4 0.8 0.2 4.1 1.8 25.3
729 539 Cody Zeller C 28 CHO 48 21 20.9 3.8 6.8 ... .714 2.5 4.4 6.8 1.8 0.6 0.4 1.1 2.5 9.4
730 540 Ivica Zubac C 23 LAC 72 33 22.3 3.6 5.5 ... .789 2.6 4.6 7.2 1.3 0.3 0.9 1.1 2.6 9.0

705 rows × 30 columns

In [11]:
df=df.drop(['Rk'],axis=1)
df
Out[11]:
Player Pos Age Tm G GS MP FG FGA FG% ... FT% ORB DRB TRB AST STL BLK TOV PF PTS
0 Precious Achiuwa PF 21 MIA 61 4 12.1 2.0 3.7 .544 ... .509 1.2 2.2 3.4 0.5 0.3 0.5 0.7 1.5 5.0
1 Jaylen Adams PG 24 MIL 7 0 2.6 0.1 1.1 .125 ... 0 0.0 0.4 0.4 0.3 0.0 0.0 0.0 0.1 0.3
2 Steven Adams C 27 NOP 58 58 27.7 3.3 5.3 .614 ... .444 3.7 5.2 8.9 1.9 0.9 0.7 1.3 1.9 7.6
3 Bam Adebayo C 23 MIA 64 64 33.5 7.1 12.5 .570 ... .799 2.2 6.7 9.0 5.4 1.2 1.0 2.6 2.3 18.7
4 LaMarcus Aldridge C 35 TOT 26 23 25.9 5.4 11.4 .473 ... .872 0.7 3.8 4.5 1.9 0.4 1.1 1.0 1.8 13.5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
726 Delon Wright PG 28 SAC 27 8 25.8 3.9 8.3 .462 ... .833 1.0 2.9 3.9 3.6 1.6 0.4 1.3 1.1 10.0
727 Thaddeus Young PF 32 CHI 68 23 24.3 5.4 9.7 .559 ... .628 2.5 3.8 6.2 4.3 1.1 0.6 2.0 2.2 12.1
728 Trae Young PG 22 ATL 63 63 33.7 7.7 17.7 .438 ... .886 0.6 3.3 3.9 9.4 0.8 0.2 4.1 1.8 25.3
729 Cody Zeller C 28 CHO 48 21 20.9 3.8 6.8 .559 ... .714 2.5 4.4 6.8 1.8 0.6 0.4 1.1 2.5 9.4
730 Ivica Zubac C 23 LAC 72 33 22.3 3.6 5.5 .652 ... .789 2.6 4.6 7.2 1.3 0.3 0.9 1.1 2.6 9.0

705 rows × 29 columns

In [12]:
df.to_csv("nba2021.csv",index=False)
In [20]:
df=pd.read_csv("nba2021.csv")
df
Out[20]:
Player Pos Age Tm G GS MP FG FGA FG% ... FT% ORB DRB TRB AST STL BLK TOV PF PTS
0 Precious Achiuwa PF 21 MIA 61 4 12.1 2.0 3.7 0.544 ... 0.509 1.2 2.2 3.4 0.5 0.3 0.5 0.7 1.5 5.0
1 Jaylen Adams PG 24 MIL 7 0 2.6 0.1 1.1 0.125 ... 0.000 0.0 0.4 0.4 0.3 0.0 0.0 0.0 0.1 0.3
2 Steven Adams C 27 NOP 58 58 27.7 3.3 5.3 0.614 ... 0.444 3.7 5.2 8.9 1.9 0.9 0.7 1.3 1.9 7.6
3 Bam Adebayo C 23 MIA 64 64 33.5 7.1 12.5 0.570 ... 0.799 2.2 6.7 9.0 5.4 1.2 1.0 2.6 2.3 18.7
4 LaMarcus Aldridge C 35 TOT 26 23 25.9 5.4 11.4 0.473 ... 0.872 0.7 3.8 4.5 1.9 0.4 1.1 1.0 1.8 13.5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
700 Delon Wright PG 28 SAC 27 8 25.8 3.9 8.3 0.462 ... 0.833 1.0 2.9 3.9 3.6 1.6 0.4 1.3 1.1 10.0
701 Thaddeus Young PF 32 CHI 68 23 24.3 5.4 9.7 0.559 ... 0.628 2.5 3.8 6.2 4.3 1.1 0.6 2.0 2.2 12.1
702 Trae Young PG 22 ATL 63 63 33.7 7.7 17.7 0.438 ... 0.886 0.6 3.3 3.9 9.4 0.8 0.2 4.1 1.8 25.3
703 Cody Zeller C 28 CHO 48 21 20.9 3.8 6.8 0.559 ... 0.714 2.5 4.4 6.8 1.8 0.6 0.4 1.1 2.5 9.4
704 Ivica Zubac C 23 LAC 72 33 22.3 3.6 5.5 0.652 ... 0.789 2.6 4.6 7.2 1.3 0.3 0.9 1.1 2.6 9.0

705 rows × 29 columns

In [25]:
pd.set_option('display.max_rows', 10)
In [27]:
df.dtypes
Out[27]:
Player     object
Pos        object
Age         int64
Tm         object
G           int64
           ...   
STL       float64
BLK       float64
TOV       float64
PF        float64
PTS       float64
Length: 29, dtype: object
In [72]:
df.select_dtypes(include=['number'])\
  Input In [72]
    df.select_dtypes(include=['number'])\
                                         ^
SyntaxError: unexpected EOF while parsing
In [29]:
df.select_dtypes(include=['object'])
Out[29]:
Player Pos Tm
0 Precious Achiuwa PF MIA
1 Jaylen Adams PG MIL
2 Steven Adams C NOP
3 Bam Adebayo C MIA
4 LaMarcus Aldridge C TOT
... ... ... ...
700 Delon Wright PG SAC
701 Thaddeus Young PF CHI
702 Trae Young PG ATL
703 Cody Zeller C CHO
704 Ivica Zubac C LAC

705 rows × 3 columns

In [32]:
playerpoint=df[df.PTS == df.PTS.max()]
In [33]:
playerpoint
Out[33]:
Player Pos Age Tm G GS MP FG FGA FG% ... FT% ORB DRB TRB AST STL BLK TOV PF PTS
151 Stephen Curry PG 32 GSW 63 63 34.2 10.4 21.7 0.482 ... 0.916 0.5 5.0 5.5 5.8 1.2 0.1 3.4 1.9 32.0

1 rows × 29 columns

In [35]:
playerpoint.Tm
Out[35]:
151    GSW
Name: Tm, dtype: object
In [38]:
playerpoint.Pos
Out[38]:
151    PG
Name: Pos, dtype: object
In [39]:
playerpoint.G
Out[39]:
151    63
Name: G, dtype: int64
In [40]:
df[df.PTS>20]
Out[40]:
Player Pos Age Tm G GS MP FG FGA FG% ... FT% ORB DRB TRB AST STL BLK TOV PF PTS
17 Giannis Antetokounmpo PF 26 MIL 61 61 33.0 10.3 18.0 0.569 ... 0.685 1.6 9.4 11.0 5.9 1.2 1.2 3.4 2.8 28.1
45 Bradley Beal SG 27 WAS 60 60 35.8 11.2 23.0 0.485 ... 0.889 1.2 3.5 4.7 4.4 1.2 0.4 3.1 2.3 31.3
71 Devin Booker SG 24 PHO 67 67 33.9 9.3 19.2 0.484 ... 0.867 0.5 3.7 4.2 4.3 0.8 0.2 3.1 2.7 25.6
89 Malcolm Brogdon PG 28 IND 56 56 34.5 7.9 17.5 0.453 ... 0.864 1.0 4.2 5.3 5.9 0.9 0.3 2.1 2.0 21.2
94 Jaylen Brown SG 24 BOS 58 58 34.5 9.3 19.2 0.484 ... 0.764 1.2 4.8 6.0 3.4 1.2 0.6 2.7 2.9 24.7
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
665 John Wall PG 30 HOU 40 40 32.2 7.3 18.2 0.404 ... 0.749 0.4 2.8 3.2 6.9 1.1 0.8 3.5 1.2 20.6
675 Russell Westbrook PG 32 WAS 65 65 36.4 8.4 19.0 0.439 ... 0.656 1.7 9.9 11.5 11.7 1.4 0.4 4.8 2.9 22.2
688 Zion Williamson PF 20 NOP 61 61 33.2 10.4 17.0 0.611 ... 0.698 2.7 4.5 7.2 3.7 0.9 0.6 2.7 2.2 27.0
696 Christian Wood C 25 HOU 41 41 32.3 8.0 15.6 0.514 ... 0.631 1.9 7.8 9.6 1.7 0.8 1.2 2.0 2.1 21.0
702 Trae Young PG 22 ATL 63 63 33.7 7.7 17.7 0.438 ... 0.886 0.6 3.3 3.9 9.4 0.8 0.2 4.1 1.8 25.3

49 rows × 29 columns

In [41]:
df[df['3P']==df['3P'].max()]
Out[41]:
Player Pos Age Tm G GS MP FG FGA FG% ... FT% ORB DRB TRB AST STL BLK TOV PF PTS
151 Stephen Curry PG 32 GSW 63 63 34.2 10.4 21.7 0.482 ... 0.916 0.5 5.0 5.5 5.8 1.2 0.1 3.4 1.9 32.0

1 rows × 29 columns

In [42]:
df[df['AST']==df['AST'].max()]
Out[42]:
Player Pos Age Tm G GS MP FG FGA FG% ... FT% ORB DRB TRB AST STL BLK TOV PF PTS
675 Russell Westbrook PG 32 WAS 65 65 36.4 8.4 19.0 0.439 ... 0.656 1.7 9.9 11.5 11.7 1.4 0.4 4.8 2.9 22.2

1 rows × 29 columns

In [47]:
LAL=df.groupby('Tm').get_group('LAL')
In [49]:
LAL[LAL.PTS == LAL.PTS.max()]
Out[49]:
Player Pos Age Tm G GS MP FG FGA FG% ... FT% ORB DRB TRB AST STL BLK TOV PF PTS
329 LeBron James PG 36 LAL 45 45 33.4 9.4 18.3 0.513 ... 0.698 0.6 7.0 7.7 7.8 1.1 0.6 3.7 1.6 25.0

1 rows × 29 columns

In [50]:
df.groupby('Pos').PTS.describe()
Out[50]:
count mean std min 25% 50% 75% max
Pos
C 138.0 8.451449 5.648205 0.0 4.775 7.55 11.200 28.5
C-PF 2.0 8.450000 7.141778 3.4 5.925 8.45 10.975 13.5
PF 143.0 7.484615 5.924184 0.0 3.150 6.00 10.050 28.1
PF-C 1.0 7.000000 NaN 7.0 7.000 7.00 7.000 7.0
PF-SF 2.0 4.150000 1.484924 3.1 3.625 4.15 4.675 5.2
... ... ... ... ... ... ... ... ...
SF-PF 3.0 5.366667 4.119871 1.5 3.200 4.90 7.300 9.7
SF-SG 3.0 9.733333 6.833984 3.6 6.050 8.50 12.800 17.1
SG 162.0 9.485185 6.427515 0.1 4.400 8.25 12.425 31.3
SG-PG 2.0 8.550000 2.333452 6.9 7.725 8.55 9.375 10.2
SG-SF 2.0 15.100000 4.949747 11.6 13.350 15.10 16.850 18.6

13 rows × 8 columns

In [52]:
positions=['C','PF','SG','SF','PG']
POS= df[df['Pos'].isin(positions)]
POS
Out[52]:
Player Pos Age Tm G GS MP FG FGA FG% ... FT% ORB DRB TRB AST STL BLK TOV PF PTS
0 Precious Achiuwa PF 21 MIA 61 4 12.1 2.0 3.7 0.544 ... 0.509 1.2 2.2 3.4 0.5 0.3 0.5 0.7 1.5 5.0
1 Jaylen Adams PG 24 MIL 7 0 2.6 0.1 1.1 0.125 ... 0.000 0.0 0.4 0.4 0.3 0.0 0.0 0.0 0.1 0.3
2 Steven Adams C 27 NOP 58 58 27.7 3.3 5.3 0.614 ... 0.444 3.7 5.2 8.9 1.9 0.9 0.7 1.3 1.9 7.6
3 Bam Adebayo C 23 MIA 64 64 33.5 7.1 12.5 0.570 ... 0.799 2.2 6.7 9.0 5.4 1.2 1.0 2.6 2.3 18.7
4 LaMarcus Aldridge C 35 TOT 26 23 25.9 5.4 11.4 0.473 ... 0.872 0.7 3.8 4.5 1.9 0.4 1.1 1.0 1.8 13.5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
700 Delon Wright PG 28 SAC 27 8 25.8 3.9 8.3 0.462 ... 0.833 1.0 2.9 3.9 3.6 1.6 0.4 1.3 1.1 10.0
701 Thaddeus Young PF 32 CHI 68 23 24.3 5.4 9.7 0.559 ... 0.628 2.5 3.8 6.2 4.3 1.1 0.6 2.0 2.2 12.1
702 Trae Young PG 22 ATL 63 63 33.7 7.7 17.7 0.438 ... 0.886 0.6 3.3 3.9 9.4 0.8 0.2 4.1 1.8 25.3
703 Cody Zeller C 28 CHO 48 21 20.9 3.8 6.8 0.559 ... 0.714 2.5 4.4 6.8 1.8 0.6 0.4 1.1 2.5 9.4
704 Ivica Zubac C 23 LAC 72 33 22.3 3.6 5.5 0.652 ... 0.789 2.6 4.6 7.2 1.3 0.3 0.9 1.1 2.6 9.0

689 rows × 29 columns

In [53]:
POS.groupby('Pos').PTS.describe()
Out[53]:
count mean std min 25% 50% 75% max
Pos
C 138.0 8.451449 5.648205 0.0 4.775 7.55 11.200 28.5
PF 143.0 7.484615 5.924184 0.0 3.150 6.00 10.050 28.1
PG 127.0 9.625984 7.062737 0.0 4.450 7.50 13.250 32.0
SF 119.0 7.811765 6.013081 0.0 3.900 6.10 10.900 26.4
SG 162.0 9.485185 6.427515 0.1 4.400 8.25 12.425 31.3
In [54]:
PTS= df[['Pos','PTS']]
positions=['C','PF','SF','PG','SG']
PTS=PTS[PTS['Pos'].isin(positions)]
PTS
Out[54]:
Pos PTS
0 PF 5.0
1 PG 0.3
2 C 7.6
3 C 18.7
4 C 13.5
... ... ...
700 PG 10.0
701 PF 12.1
702 PG 25.3
703 C 9.4
704 C 9.0

689 rows × 2 columns

In [55]:
PTS['PTS'].hist(by=PTS['Pos'])
Out[55]:
array([[<AxesSubplot:title={'center':'C'}>,
        <AxesSubplot:title={'center':'PF'}>],
       [<AxesSubplot:title={'center':'PG'}>,
        <AxesSubplot:title={'center':'SF'}>],
       [<AxesSubplot:title={'center':'SG'}>, <AxesSubplot:>]],
      dtype=object)
In [56]:
PTS['PTS'].hist(by=PTS['Pos'], layout=(1,5))
Out[56]:
array([<AxesSubplot:title={'center':'C'}>,
       <AxesSubplot:title={'center':'PF'}>,
       <AxesSubplot:title={'center':'PG'}>,
       <AxesSubplot:title={'center':'SF'}>,
       <AxesSubplot:title={'center':'SG'}>], dtype=object)
In [57]:
PTS['PTS'].hist(by=PTS['Pos'], layout=(1,5), figsize=(16,2))
Out[57]:
array([<AxesSubplot:title={'center':'C'}>,
       <AxesSubplot:title={'center':'PF'}>,
       <AxesSubplot:title={'center':'PG'}>,
       <AxesSubplot:title={'center':'SF'}>,
       <AxesSubplot:title={'center':'SG'}>], dtype=object)
In [58]:
import seaborn as sns
import matplotlib.pyplot as plt

g=sns.FacetGrid(PTS, col="Pos")
g.map(plt.hist,"PTS");
In [59]:
PTS.boxplot(column='PTS',by='Pos')
Out[59]:
<AxesSubplot:title={'center':'PTS'}, xlabel='Pos'>
In [61]:
import seaborn as sns
sns.boxplot(x='Pos',y='PTS',data=PTS)
Out[61]:
<AxesSubplot:xlabel='Pos', ylabel='PTS'>
In [63]:
sns.boxplot(x='Pos',y='PTS',data=PTS)
sns.stripplot(x='Pos',y='PTS',data=PTS, 
              jitter=True, 
              marker='o',
              alpha=0.8,
             color="black")
Out[63]:
<AxesSubplot:xlabel='Pos', ylabel='PTS'>
In [73]:
corr=df.corr()
corr
Out[73]:
Age G GS MP FG FGA FG% 3P 3PA 3P% ... FT% ORB DRB TRB AST STL BLK TOV PF PTS
Age 1.000000 -0.345517 -0.283681 -0.192756 -0.262490 -0.172875 -0.370059 0.059354 0.053957 0.056855 ... -0.034809 -0.355144 -0.325032 -0.354690 -0.000381 -0.096071 -0.349642 -0.162726 -0.347479 -0.225235
G -0.345517 1.000000 0.648273 0.470698 0.365466 0.360958 -0.077819 0.355054 0.341830 0.202187 ... 0.194120 0.016228 0.235345 0.172882 0.269564 0.384014 -0.085464 0.250864 0.278208 0.358279
GS -0.283681 0.648273 1.000000 0.904311 0.858556 0.835419 -0.316770 0.512978 0.545819 -0.054649 ... -0.034834 0.233035 0.709844 0.584480 0.729174 0.764005 0.166020 0.806211 0.664151 0.844115
MP -0.192756 0.470698 0.904311 1.000000 0.949435 0.961235 -0.447310 0.686779 0.725822 0.092808 ... 0.119708 0.081652 0.671941 0.504309 0.870400 0.897641 0.038798 0.909048 0.674765 0.951051
FG -0.262490 0.365466 0.858556 0.949435 1.000000 0.979139 -0.321795 0.608308 0.647975 0.023557 ... 0.069167 0.141489 0.695303 0.541744 0.867253 0.815677 0.083778 0.940158 0.617646 0.993363
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
STL -0.096071 0.384014 0.764005 0.897641 0.815677 0.849021 -0.469837 0.619763 0.661657 0.111419 ... 0.168319 -0.030743 0.504083 0.346050 0.875917 1.000000 -0.064736 0.834164 0.554084 0.823366
BLK -0.349642 -0.085464 0.166020 0.038798 0.083778 -0.075097 0.479992 -0.595553 -0.559639 -0.791483 ... -0.605296 0.916050 0.675897 0.799923 -0.228683 -0.064736 1.000000 0.068952 0.634553 -0.006807
TOV -0.162726 0.250864 0.806211 0.909048 0.940158 0.937102 -0.434901 0.549428 0.601504 -0.036053 ... 0.019052 0.117118 0.669709 0.515018 0.934232 0.834164 0.068952 1.000000 0.616205 0.942888
PF -0.347479 0.278208 0.664151 0.674765 0.617646 0.532462 0.023450 0.048993 0.099438 -0.404959 ... -0.263962 0.640282 0.853605 0.828871 0.386731 0.554084 0.634553 0.616205 1.000000 0.560818
PTS -0.225235 0.358279 0.844115 0.951051 0.993363 0.992835 -0.398770 0.679562 0.717078 0.099527 ... 0.133879 0.041110 0.628774 0.459399 0.896770 0.823366 -0.006807 0.942888 0.560818 1.000000

26 rows × 26 columns

In [66]:
sns.heatmap(corr)
Out[66]:
<AxesSubplot:>
In [67]:
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax= plt.subplots(figsize=(7,5))
sns.heatmap(corr,square=True)
Out[67]:
<AxesSubplot:>
In [68]:
# https://seaborn.pydata.org/generated/seaborn.heatmap.html

import numpy as np
import seaborn as sns

mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
with sns.axes_style("white"):
    f, ax = plt.subplots(figsize=(7, 5))
    ax = sns.heatmap(corr, mask=mask, vmax=1, square=True)
In [74]:
df
Out[74]:
Age G GS MP FG FGA FG% 3P 3PA 3P% ... FT% ORB DRB TRB AST STL BLK TOV PF PTS
Age 1.000000 0.061119 0.092335 0.198153 0.127956 0.132420 0.048242 0.185962 0.163591 0.113262 ... 0.129573 0.017896 0.139539 0.111839 0.226687 0.178456 0.042726 0.121069 0.111609 0.138877
G 0.061119 1.000000 0.644453 0.553135 0.472687 0.441459 0.296131 0.397096 0.362864 0.297257 ... 0.322548 0.264688 0.404790 0.389992 0.337747 0.409568 0.181272 0.325467 0.392205 0.465707
GS 0.092335 0.644453 1.000000 0.764826 0.716464 0.692610 0.221514 0.511321 0.499250 0.187241 ... 0.209549 0.370520 0.629791 0.593427 0.550817 0.553341 0.322566 0.591306 0.536160 0.713721
MP 0.198153 0.553135 0.764826 1.000000 0.879032 0.888528 0.256452 0.693692 0.707618 0.326397 ... 0.373425 0.381314 0.730310 0.672641 0.712777 0.753880 0.358051 0.736907 0.705754 0.879928
FG 0.127956 0.472687 0.716464 0.879032 1.000000 0.975263 0.325081 0.669591 0.669400 0.287325 ... 0.321964 0.381583 0.724941 0.668630 0.720105 0.652066 0.340571 0.794838 0.595233 0.990473
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
STL 0.178456 0.409568 0.553341 0.753880 0.652066 0.669474 0.177922 0.486753 0.504022 0.242138 ... 0.315748 0.234928 0.502869 0.454899 0.680935 1.000000 0.238287 0.603082 0.531279 0.649888
BLK 0.042726 0.181272 0.322566 0.358051 0.340571 0.248033 0.409630 -0.034478 -0.041055 -0.139143 ... 0.001805 0.653452 0.575798 0.639057 0.087960 0.238287 1.000000 0.264079 0.538745 0.303450
TOV 0.121069 0.325467 0.591306 0.736907 0.794838 0.802002 0.144745 0.496285 0.523658 0.176498 ... 0.212927 0.288535 0.622156 0.561929 0.822754 0.603082 0.264079 1.000000 0.564525 0.806638
PF 0.111609 0.392205 0.536160 0.705754 0.595233 0.563711 0.337232 0.322891 0.339857 0.133237 ... 0.225081 0.530736 0.666796 0.669975 0.399748 0.531279 0.538745 0.564525 1.000000 0.576618
PTS 0.138877 0.465707 0.713721 0.879928 0.990473 0.980010 0.278695 0.722054 0.722231 0.316505 ... 0.353171 0.320283 0.696320 0.627926 0.740902 0.649888 0.303450 0.806638 0.576618 1.000000

26 rows × 26 columns

In [76]:
dataframe=pd.read_csv("nba2021.csv")
dataframe
Out[76]:
Player Pos Age Tm G GS MP FG FGA FG% ... FT% ORB DRB TRB AST STL BLK TOV PF PTS
0 Precious Achiuwa PF 21 MIA 61 4 12.1 2.0 3.7 0.544 ... 0.509 1.2 2.2 3.4 0.5 0.3 0.5 0.7 1.5 5.0
1 Jaylen Adams PG 24 MIL 7 0 2.6 0.1 1.1 0.125 ... 0.000 0.0 0.4 0.4 0.3 0.0 0.0 0.0 0.1 0.3
2 Steven Adams C 27 NOP 58 58 27.7 3.3 5.3 0.614 ... 0.444 3.7 5.2 8.9 1.9 0.9 0.7 1.3 1.9 7.6
3 Bam Adebayo C 23 MIA 64 64 33.5 7.1 12.5 0.570 ... 0.799 2.2 6.7 9.0 5.4 1.2 1.0 2.6 2.3 18.7
4 LaMarcus Aldridge C 35 TOT 26 23 25.9 5.4 11.4 0.473 ... 0.872 0.7 3.8 4.5 1.9 0.4 1.1 1.0 1.8 13.5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
700 Delon Wright PG 28 SAC 27 8 25.8 3.9 8.3 0.462 ... 0.833 1.0 2.9 3.9 3.6 1.6 0.4 1.3 1.1 10.0
701 Thaddeus Young PF 32 CHI 68 23 24.3 5.4 9.7 0.559 ... 0.628 2.5 3.8 6.2 4.3 1.1 0.6 2.0 2.2 12.1
702 Trae Young PG 22 ATL 63 63 33.7 7.7 17.7 0.438 ... 0.886 0.6 3.3 3.9 9.4 0.8 0.2 4.1 1.8 25.3
703 Cody Zeller C 28 CHO 48 21 20.9 3.8 6.8 0.559 ... 0.714 2.5 4.4 6.8 1.8 0.6 0.4 1.1 2.5 9.4
704 Ivica Zubac C 23 LAC 72 33 22.3 3.6 5.5 0.652 ... 0.789 2.6 4.6 7.2 1.3 0.3 0.9 1.1 2.6 9.0

705 rows × 29 columns

In [78]:
number= dataframe.select_dtypes(include=["number"])
In [79]:
number.iloc[:,:5]
Out[79]:
Age G GS MP FG
0 21 61 4 12.1 2.0
1 24 7 0 2.6 0.1
2 27 58 58 27.7 3.3
3 23 64 64 33.5 7.1
4 35 26 23 25.9 5.4
... ... ... ... ... ...
700 28 27 8 25.8 3.9
701 32 68 23 24.3 5.4
702 22 63 63 33.7 7.7
703 28 48 21 20.9 3.8
704 23 72 33 22.3 3.6

705 rows × 5 columns

In [83]:
selections = ['Age', 'G', 'STL', 'BLK', 'AST', 'PTS']
newdf= dataframe[selections]
newdf
Out[83]:
Age G STL BLK AST PTS
0 21 61 0.3 0.5 0.5 5.0
1 24 7 0.0 0.0 0.3 0.3
2 27 58 0.9 0.7 1.9 7.6
3 23 64 1.2 1.0 5.4 18.7
4 35 26 0.4 1.1 1.9 13.5
... ... ... ... ... ... ...
700 28 27 1.6 0.4 3.6 10.0
701 32 68 1.1 0.6 4.3 12.1
702 22 63 0.8 0.2 9.4 25.3
703 28 48 0.6 0.4 1.8 9.4
704 23 72 0.3 0.9 1.3 9.0

705 rows × 6 columns

In [85]:
import seaborn as sns

g=sns.PairGrid(newdf)
g.map(plt.scatter);
In [88]:
import seaborn as sns
g=sns.PairGrid(number)
g.map(plt.scatter)
Out[88]:
<seaborn.axisgrid.PairGrid at 0x1f93e280a00>